#!/usr/bin/perl -w
# Bug reports and comments to Igor.Ermolaev@intel.com
use strict;
no warnings "portable";

use constant { FALSE=>0, TRUE=>-1 };

my $focus_thread = undef;
if( $#ARGV >= 0 && $ARGV[0] =~ /^\d+$/ )
{
   $focus_thread = int $ARGV[0];
   shift @ARGV;
}

our $cur_tid = undef;
our @execs  = ();
our @list   = ();
our @instrs = ();
our @jumps  = ();
our @calls  = ();
our @total  = ();
our $prev_addr = undef;
our $need_next = undef;
my  $thrds_cnt = 1;

sub add_line
{
   my( $addr, $code, $instr ) = @_;
   my $t    = $cur_tid;
   my $exec = $execs[$t];
   if( $exec > 0 )
   {
      my $sum = 0; for (@execs) {$sum += $_;}
      $addr = uc $addr;$addr =~ s/^0+([[:xdigit:]]+)/$1/o;

      if( exists $instrs[$t]{$addr} )
      {
         $instrs[$t]{$addr}{exec} += $exec;;
      }
      else
      {
         push @{$list[$t]}, $addr;
         $instrs[$t]{$addr} = { exec=>$exec, code=>$code, instr=>$instr, prevs=>[], calls=>[], events=>{}, events_share=>( $exec / $sum ) };
      }
      $total[$t] += $exec;
      if( defined $need_next )
      {
         $need_next->{next} = $addr;
         $need_next         = undef;
      }
      if( defined $prev_addr )
      {
         $instrs[$t]{$addr}{prev} = $prev_addr;
         $prev_addr               = undef;
      }
      if( $instr =~ /^(hint-(not-taken|taken)\s+)?(jk\w+\s+k\d+\s*,\s*|j\w+\s+)(0x)?([[:xdigit:]]+)/io )
      {
         my $type   = $3;
         my $target = uc $5;$target =~ s/^0+([[:xdigit:]]+)/$1/o; # must be the last
            $target = substr( $addr, 0, length($addr) - length($target) ).$target if length($target) < length($addr);
         #print "$addr: $target\n";
         $jumps[$t]{$addr} = { target=>$target, is_cond=>( $type =~ /^jmp/ ? FALSE : TRUE ) };
         if( $jumps[$t]{$addr}{is_cond} == TRUE )
         {
            $need_next = $jumps[$t]{$addr};
            $prev_addr = $addr;
         }
      }
      elsif( $instr =~ /^rep/io )
      {
         $jumps[$t]{$addr} = { target=>$addr, is_cond=>TRUE };
         $need_next        = $jumps[$t]{$addr};
         $prev_addr        = $addr;
      }
      elsif( $instr =~ /^(call\w+)\s+(0x)?([[:xdigit:]]+)/io )
      {
         my $target = uc $3;$target =~ s/^0+([[:xdigit:]]+)/$1/o; # must be the last
            $target = substr( $addr, 0, length($addr) - length($target) ).$target if length($target) < length($addr);
         $calls[$t]{$addr} = { target=>$target };
         $prev_addr = $addr; # not correct but helps to find loops with calls to SVML etc.
      }
      else
      {
         $prev_addr = $addr if !/^ret/io;
      }
   }
   else
   {
      $need_next = undef;
      $prev_addr = undef;
   }
}

while( <STDIN> )
{
   if( /^\s*([[:xdigit:]]+)\:\s*\d+\s*\(\s*(\d+(\s*\+\d+)*)\)\s*\:\s*([[:xdigit:]]+)\s*([^[\n]+)\s*/io )
   {
      @execs     = map int, (split /\s*\+\s*/, $2);
      $thrds_cnt = $#execs+1 if $#execs+1 > $thrds_cnt;
      add_line $1, $4, $5;
   }
   if( /\s*EMIT_TOP_BLOCK_STATS\s+FOR\s+TID\s+(\d+)\s+(OS-TID\s+(\d+)\s+)?EMIT\s+/io )
   {
      $cur_tid   = int $1;
      $thrds_cnt = $cur_tid > $#execs ? $cur_tid+1 : $#execs+1;
      for( my $t = 0; $t < $thrds_cnt; $t++ )
      {
         $execs[$t]     = 0;
         $total[$t]     = 0  if !defined $total[$t];
         @{$list[$t]}   = () if !defined $list[$t];
         %{$instrs[$t]} = () if !defined $instrs[$t];
         %{$jumps[$t]}  = () if !defined $jumps[$t];
         %{$calls[$t]}  = () if !defined $calls[$t];
      }
   }
   elsif( /\s+ICOUNT\s*:\s*([0-9,]+)\s+EXECUTIONS\s*:\s*([0-9,]+)/io )
   {
      my $et = $2;$et =~ tr/,//d;
      $execs[$cur_tid] = int $et;
      $need_next = undef;
      $prev_addr = undef;
   }
   elsif( /^\s*XDIS\s*([[:xdigit:]]+)\s*:\s*\w+\s+([[:xdigit:]]+)\s*(.+)$/io )
   {
      add_line $1, $2, $3;
   }
   elsif( /^\s*\Q...\E/io )
   {
      $need_next = undef;
      $prev_addr = undef;
   }
}

for( my $t = 0; $t < $thrds_cnt; $t++ )
{
   next if scalar @{$list[$t]} == 0;
   push @{$list[$t]}, 'FFFFFFFF';
   $instrs[$t]{'FFFFFFFF'} = { exec=>1, code=>'00', instr=>'gap', prevs=>[], calls=>[], events=>{}, events_share=>(1.0/$thrds_cnt) };
   $total[$t] += 1;

    @{$list[$t]} = sort { hex $a <=> hex $b } @{$list[$t]};
      $prev_addr = undef;
   my $next_addr = undef;
   for my $addr (@{$list[$t]})
   {
      my $i = $instrs[$t]{$addr};
      if( defined $next_addr && $next_addr == hex $addr )
      {
         $i->{prev} = $prev_addr if !exists $i->{prev};
         $need_next->{next} = $addr if defined $need_next;
      }
      $prev_addr = $addr;
      $next_addr = exists $calls[$t]{$addr} || $i->{instr} =~ /^ret/io ? undef : ( hex $addr ) + ( length( $i->{code} ) >> 1 );
      $need_next = exists $jumps[$t]{$addr} && $jumps[$t]{$addr}{is_cond} == TRUE ? $jumps[$t]{$addr} : undef;
   }
   while( my ( $addr, $r ) = each %{$jumps[$t]} )
   {
      push @{($instrs[$t]{$r->{target}}{prevs})}, $addr if exists $instrs[$t]{$r->{target}};
   }
   while( my ( $addr, $r ) = each %{$calls[$t]} )
   {
      push @{($instrs[$t]{$r->{target}}{calls})}, $addr if exists $instrs[$t]{$r->{target}};
   }
}

sub in_edges
{
   my( $t, $ignore ) = @_;
   my $c   = $cur_tid;
   my $sum = 0;
   for my $prev ( @{$t->{prevs}} )
   {
      my $j = $jumps[$c]{$prev};
      if( $j->{is_cond} == FALSE )
      {
         $sum += $instrs[$c]{$prev}{exec};
      }
      elsif( exists $j->{taken} )
      {
         $sum += $j->{taken};
      }
      elsif( !defined $ignore || $prev ne $ignore )
      {
         $sum = undef;
         last;
      }
   }
   return $sum;
}

for( my $c = 0; $c < $thrds_cnt; $c++ )
{
   $cur_tid = $c;

   my $changing;
   my $seen_unresolved;

   do
   {
      $changing = FALSE;
      $seen_unresolved = FALSE;
      while( my ( $addr, $r ) = each %{$jumps[$c]} )
      {
         if( $r->{is_cond} == TRUE && !exists $r->{taken} )
         {
            if( exists $r->{next} )
            {
               if( $r->{target} eq $r->{next} )
               {
                  $r->{taken} = $instrs[$c]{$addr}{exec};
                  $changing   = TRUE;
               }
               else
               {
                  my $sum = in_edges $instrs[$c]{$r->{next}};
                  if( defined $sum )
                  {
                     $r->{taken} = $instrs[$c]{$addr}{exec} - ( $instrs[$c]{$r->{next}}{exec} - $sum );
                     $changing   = TRUE;
                     #print "$addr $r->{is_cond} $r->{taken} $instrs[$c]{$addr}{exec} $instrs[$c]{$r->{next}}{exec} $sum $r->{next}\n";
                  }
                  else
                  {
                     $seen_unresolved = TRUE;
                  }
               }
            }
            else
            {
               if( !exists $instrs[$c]{$r->{target}} )
               {
                  $r->{taken} = 0;
                  $changing   = TRUE;
               }
               else
               {
                  my $t   = $instrs[$c]{$r->{target}};
                  my $sum = in_edges $t, $addr;
                  if( defined $sum && exists $t->{prev} )
                  {
                     $sum += $instrs[$c]{$t->{prev}}{exec};
                     if( exists $jumps[$c]{$t->{prev}} )
                     {
                        if( exists $jumps[$c]{$t->{prev}}{taken} )
                        {
                           $sum -= $jumps[$c]{$t->{prev}}{taken};
                        }
                        else
                        {
                           $sum = undef;
                        }
                     }
                  }
                  if( defined $sum )
                  {
                     $r->{taken} = $t->{exec} - $sum;
                     $r->{taken} = $instrs[$c]{$addr}{exec} if $r->{taken} > $instrs[$c]{$addr}{exec};
                     $changing   = TRUE;
                  }
                  else
                  {
                     $seen_unresolved = TRUE;
                  }
               }
            }
         }
      }
      if( $changing == FALSE && $seen_unresolved == TRUE )
      {
         my $c_not = undef;
         my $e_not = undef;
         my $c_fwd = undef;
         my $e_fwd = undef;
         my $c_sml = undef;
         my $e_sml = undef;
         my $a_sml = undef;
         while( my ( $addr, $r ) = each %{$jumps[$c]} )
         {
            if( $r->{is_cond} == TRUE && !exists $r->{taken} )
            {
               my $i = $instrs[$c]{$addr};
               if( !exists $instrs[$c]{$r->{target}} && ( !defined $c_not || $e_not > $i->{exec} ) )
               {
                  $c_not = $r;
                  $e_not = $i->{exec};
               }
               if( hex $r->{target} > hex $addr && ( !defined $c_fwd || $e_fwd > $i->{exec} ) )
               {
                  $c_fwd = $r;
                  $e_fwd = $i->{exec};
               }
               if( !defined $c_sml || $e_sml > $i->{exec} || ( $e_sml == $i->{exec} && hex $a_sml < hex $addr ) )
               {
                  $c_sml = $r;
                  $e_sml = $i->{exec};
                  $a_sml = $addr;
               }
            }
         }
         if( defined $c_not )
         {
            $c_not->{taken} = 0;
         }
         elsif( defined $c_fwd )
         {
            $c_fwd->{taken} = 0;
         }
         else
         {
            $c_sml->{taken} = $instrs[$c]{$a_sml}{exec} >> 1;
         }
         $changing = TRUE;
      }
   }
   while( $changing == TRUE );

   while( my ( $addr, $r ) = each %{$jumps[$c]} )
   {
      my $tkn = 0;
      if( $r->{is_cond} == FALSE )
      {
         $tkn = $instrs[$c]{$addr}{exec};
      }
      elsif( exists $r->{taken} )
      {
         $tkn = $r->{taken};
      }
      if( $tkn > 0 )
      {
         if( exists $instrs[$c]{$r->{target}} )
         {
            if( $instrs[$c]{$r->{target}}{instr} eq 'gap' )
            {
               $instrs[$c]{$r->{target}}{exec} += $tkn;
               $total[$c] += $tkn;
            }
         }
         else
         {
            push @{$list[$c]}, $r->{target};
            $instrs[$c]{$r->{target}} = { exec=>$tkn, code=>'00', instr=>'gap', prevs=>[], calls=>[], events=>{}, events_share=>(1.0/$thrds_cnt) };
            $total[$c] += $tkn;
         }
      }
   }
   @{$list[$c]} = sort { hex $a <=> hex $b } @{$list[$c]};
}

#for my $addr (keys %jumps)
#{
   #print "$addr:", exists $jumps{$addr}{taken} ? $jumps{$addr}{taken} : $jumps{$addr}{is_cond} == FALSE ? "direct" : "not found", "\n";
#}

for my $stat ( @ARGV )
{

   my %EVENTS = ( "l1 dcache misses"=>"L1d_M", "l2 cache misses"=>"L2_M", "dtlb 4k misses"=>"DTLB_M", "dependency stalls"=>"DPF", "bank conflicts"=>"bank_conflict", "split cacheline accesses"=>"splt_line",
                  "cache-line-crossings"=>"CLC", "page-crossings"=>"PGC" );
   my $cur_event  = undef;
   my $cur_tid    = undef;
   my $till_empty = FALSE;

   open STATS, "<$stat" or die;
   while( <STATS> )
   {
      if( /top\s+\d+\s+static\s+(.+)/io )
      {
         my $key = lc $1;
         $cur_event = exists $EVENTS{$key} ? $EVENTS{$key} : undef;
      }
      elsif( defined $cur_event && /([[:xdigit:]]+)\:\s*[^:]+\:\s*(\d+)\s+/io )
      {
         #print "$cur_event: $1\n";
         my $cnt  = int $2;
         my $addr = uc $1;$addr =~ s/^0+([[:xdigit:]]+)/$1/o; # must be the last
         $instrs[$cur_tid]{$addr}{events}{$cur_event} = $cnt if exists $instrs[$cur_tid]{$addr};
      }
      elsif( /^\s*\#\s*LIST\s+OF\s+MISALIGNED\s+INSTRUCTIONS\s+FOR\s+(\S+)/io )
      {
         my     $key = lc $1;
          $cur_event = exists $EVENTS{$key} ? $EVENTS{$key} : undef;
         $till_empty = TRUE;
      }
      elsif( /^\s*(\d+)\s+(\d+)\s+(\d+)\s+(0x)?([[:xdigit:]]+)/io )
      {
         $cur_tid = int $1;
         my $cnt  = int $2 + int $3;
         my $addr = uc $5;$addr =~ s/^0+([[:xdigit:]]+)/$1/o; # must be the last
         $instrs[$cur_tid]{$addr}{events}{$cur_event} = $cnt if exists $instrs[$cur_tid]{$addr};
      }
      elsif( /^\-+$/io )
      {
         $cur_event = undef;
      }
      elsif( /^\s*\$+\s*TID\s*\:\s*(\d+)/io )
      {
         $cur_tid = int $1;
      }
      elsif( /^\s*(0x)?([[:xdigit:]]+)\s+(\d+)\s*(load|store)/ )
      {
          my $cnt  = int $3;
          my $addr = uc $2;$addr =~ s/^0+([[:xdigit:]]+)/$1/o; # must be the last
          $instrs[$cur_tid]{$addr}{events}{MALG} = $cnt if exists $instrs[$cur_tid]{$addr};
      }
      elsif( /^\s*$/o )
      {
         $cur_event  = undef if $till_empty == TRUE;
         $till_empty = FALSE;
      }
   }
   close STATS;
}

for( my $t = 0; $t < $thrds_cnt; $t++ )
{
   next if scalar @{$list[$t]} == 0;
   next if defined $focus_thread && $focus_thread != $t;

   print "Processor   0\nCore        0\nThread      $t\n";
   printf "Function 0x%08s\n", lc $list[$t][0];

   my $first_idx = 0;
   my $first_max = 0;
   my $last_idx  = $#{$list[$t]}; #maybe gap idx?
   my $last_max  = 0;
   my $cur_idx   = 0;
   for my $addr (@{$list[$t]})
   {
      my $i   = $instrs[$t]{$addr};
      my $sum = 0;
      for my $prev (@{$i->{prevs}})
      {
         my $j = $jumps[$t]{$prev};
         my $n = $j->{is_cond} == FALSE ? $instrs[$t]{$prev}{exec} : exists $j->{taken} ? $j->{taken} : 0;
         $sum += $n if $n > 0;
      }
      if( exists $i->{prev} )
      {
         my $cnt = $instrs[$t]{$i->{prev}}{exec};
         if( exists $jumps[$t]{$i->{prev}} )
         {
            my $j = $jumps[$t]{$i->{prev}};
            my $a = $j->{is_cond} == FALSE ? $cnt : exists $j->{taken} ? $j->{taken} : 0;
            $cnt -= $a;
         }
         my $n = $i->{exec} - $sum - $cnt;
         if( $n == 1 && $i->{exec} > $first_max )
         {
            $first_idx = $cur_idx;
            $first_max = $i->{exec};
         }
         if( $n == -1 && $i->{exec} >= $last_max )
         {
            $last_idx = $cur_idx > 1 ? $cur_idx - 1 : $#{$list[$t]};
            $last_max = $i->{exec};
         }
      }
      $cur_idx++;
   }
   my $first_ofs = $#{$list[$t]} + 1 - $first_idx;
   my $last_ofs  = $total[$t]    - 1 - $last_idx;

   my $first  = 1;
   $prev_addr = undef;
   my $next_addr = undef;
   my $prev_exec = undef;
 
   for my $addr (@{$list[$t]})
   {
      my $i = $instrs[$t]{$addr};
      if( defined $prev_addr && ( !exists $i->{prev} && !@{$i->{prevs}} ) )
      {
         printf "0x%08s: gap\t; b=0x00, n=%d\n", lc $next_addr, $prev_exec if defined $next_addr;
         printf "    case next_ip=0xffffffff: n=%d\n", $prev_exec;
      }
      printf "...\n" if $first != 1 && !exists $i->{prev};
      printf "0x%08s: ", lc $addr;
      print $i->{instr}, "\t; b=0x", $i->{code}, ", n=", $i->{exec};
      my $first_adj  = $first + $first_ofs;
         $first_adj -= $#{$list[$t]} + 1 if $first_adj > $#{$list[$t]} + 1;
      my  $last_adj  = $first + $last_ofs;
          $last_adj -= $#{$list[$t]} + 1 if $last_adj > $total[$t];
      my     $ratio  = $i->{exec} * 100 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print ", first=", $first_adj, ", last=", $last_adj;
      print "\n";
      print "    case beginning_of_sim: n=1, first=1\n" if $first_adj == 1;
      $prev_addr = $addr;
      $next_addr = $i->{instr} =~ /^(cmp|or|and|test)/io ? sprintf '%X', ( hex $addr ) + ( length( $i->{code} ) >> 1 ) : undef;
      $prev_exec = $i->{exec};
      while( my ( $event, $cnt ) = each %{$i->{events}} )
      {
         $cnt = int ( $cnt * $i->{events_share} );
         if( $cnt > 0 )
         {
            print "    case events=($event): n=$cnt";
            my $ratio = $cnt * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
         }
      }

      #printf "    case next_ip=0x%08s: n=0\n", lc $addr if $i->{instr} =~ /^(ret|jmp\w*\s+\*)/io;
      my $sum = 0;
      for my $prev (@{$i->{prevs}})
      {
         my $j = $jumps[$t]{$prev};
         my $n = $j->{is_cond} == FALSE ? $instrs[$t]{$prev}{exec} : exists $j->{taken} ? $j->{taken} : 0;
         if( $n > 0 )
         {
            printf "    case prev_ip=0x%08s: n=%d", lc $prev, $n;
            my $ratio = $n * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
            $sum += $n;
         }
      }
      if( exists $i->{prev} )
      {
         my $cnt = $instrs[$t]{$i->{prev}}{exec};
            $cnt-- if $list[$t][$last_idx] eq $i->{prev};
         if( exists $jumps[$t]{$i->{prev}} )
         {
            my $j = $jumps[$t]{$i->{prev}};
            my $a = $j->{is_cond} == FALSE ? $cnt : exists $j->{taken} ? $j->{taken} : 0;
            $cnt -= $a;
         }
         my $n = $i->{exec} - $sum - $cnt;
         if( ( $sum > 0 || $n > 0 ) && $cnt > 0 )
         {
            printf "    case prev_ip=0x%08s: n=%d", lc $i->{prev}, $cnt;
            my $ratio = $cnt * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
         }
         if( $n > 1 || $n == 1 && $first_adj != 1 )
         {
            printf "    case prev_ip=0xffffffff: n=%d", $n;
            my $ratio = $n * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
         }
      }
      if( exists $jumps[$t]{$addr} )
      {
         my $j = $jumps[$t]{$addr};
         if( $j->{is_cond} == FALSE )
         {
            printf "    case next_ip=0x%08s: n=%d", lc $j->{target}, $i->{exec};
            my  $ratio = $i->{exec} * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
            $prev_addr = undef;
         }
         else
         {
            #print $j->{target}, " ", $j->{next}, " ", $j->{taken}, "\n";
            if( exists $j->{taken} && $j->{taken} > 0 )
            {
               printf "    case next_ip=0x%08s: n=%d", lc $j->{target}, $j->{taken};
               my  $ratio = $j->{taken} * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
               $prev_addr = undef;
               if( exists $j->{next} )
               {
                  my $skipped = $i->{exec} - $j->{taken};
                  if( $skipped > 0 )
                  {
                     printf "    case next_ip=0x%08s: n=%d", lc $j->{next}, $skipped;
                     my $ratio = $skipped * 100.0 / $total[$t];printf "(%.1f%%)", $ratio if $ratio >= 0.1;print "\n";
                  }
               }
            }
         }
      }
      print "    case end_of_sim: n=1, first=$last_adj\n" if $last_adj == $total[$t];
      #printf "    case prev_ip=0x%08s: n=0\n", lc $addr if !exists $i->{prev} && !@{$i->{prevs}};
      $first++;
   }
 
   #print "0xffffffff: gap\t; b=0x00, n=1\n";
   printf "Function 0x%08s summary: n=%d(100%%)\n", lc $list[$t][0], $total[$t];
}

exit( 0 );
